# -*- coding: utf-8 -*-
from BeautifulSoup import BeautifulSoup
import urllib
import re
import time
import os
def getDate():
    return unicode(time.strftime( "%Y-%m-%d",time.localtime(time.time())))
	
def write2db(filename,entry):
    hdb = open(filename,'a')
    hdb.write(entry.encode('utf-8'))
    hdb.close()

def writeHeader2db(filename,header):
    if(os.path.isfile(filename)):
        return
    else:
        #print header
        write2db(filename,header)
        return

def getEntryStr(key,valuelist):
    '''
    return the entry string, TODO: unicode convert to string?
    '''
    #print key
    #print valuelist
    retstr = "" + key
    for i in valuelist:
        retstr = retstr + "\t" + i
    #print retstr
    return retstr+ "\n"
	
def getBuildingAddress(url,urlbase):
	html_src = urllib.urlopen(url).read()
	mysoup = BeautifulSoup(html_src,fromEncoding="gbk")
	ItemList = []
	for item in mysoup.findAll("a",href=re.compile('^lpxx-xs-1.+')):
		#print item
		t = item.getText()
		if (t == ''):
			addr = urlbase + str(item.attrs[0][1])
			ItemList.append(addr)
		
	return ItemList

def getEntryListFromUrl(url):
	#print "Opening" + url
	html_src = urllib.urlopen(url).read()
	#print "Opening done."
	mysoup = BeautifulSoup(html_src,fromEncoding="gbk")
	ItemList = []
        for item in mysoup.findAll("a",href=re.compile('^lpxx-xs-2\.jsp')):
		#print item
		t = item.getText()
		ItemList.append(t)
	return ItemList

def ParsefromUrl2File(url,filename):
	urlbase = "http://www.hzfc365.com/house_view/"
	tDate = u'日期'
	tBuildingNo = u'幢号'
	tSoldNo = u'已售套数'
	tReservedNo = u'已预定套数'
	tAvaliableNo = u'可售套数'
	tType = u'房屋用途'

	addrs = getBuildingAddress(url,urlbase)
	tHeaderList =[tBuildingNo,tSoldNo,tReservedNo,tAvaliableNo,tType]
	
	#print addrs
	resultlist=[]
	for item in addrs:
		#print item
		resultlist.extend(getEntryListFromUrl(item))
	#print resultlist
	
	ccc = getEntryStr(tDate,tHeaderList)
	writeHeader2db(filename,ccc);
	
        i = 0
	
	for i in range(0,resultlist.__len__()):
		if( 0 ==i%5):
			bbb = getEntryStr(getDate(),(resultlist[(i):(i+5)]))
			write2db(filename,bbb)
			i = i+5			


def main():

	url = "http://www.hzfc365.com/house_view/lpxx.jsp?pid=108802"
        filename= '/home/zhenghwa/analysis_dir/qiantang_hz.db'
	ParsefromUrl2File(url,filename)
	
	url = "http://www.hzfc365.com/house_view/lpxx.jsp?pid=108152"
        filename= '/home/zhenghwa/analysis_dir/bandao_hz.db'
	ParsefromUrl2File(url,filename)
	
	url = "http://www.hzfc365.com/house_view/lpxx.jsp?pid=107302"
        filename= '/home/zhenghwa/analysis_dir/huanyu_hz.db'
	ParsefromUrl2File(url,filename)
	
if __name__ == '__main__':
	main()
